LANG=ek
TR=30
LABEL=${LANG}-${TR}

TEST=1000
DEV=100
TRAIN=10000

if [ $TR == "sm" ]; then
	TEST=50; DEV=10; TRAIN=1000
fi

if [ $TR == "01" ]; then
	TRAIN=10000
fi
if [ $TR == "02" ]; then
	TRAIN=20000
fi
if [ $TR == "03" ]; then
	TRAIN=30000
fi
if [ $TR == "05" ]; then
	TRAIN=50000
fi
if [ $TR == "10" ]; then
	TRAIN=100000
fi
if [ $TR == "15" ]; then
	TRAIN=150000
fi
if [ $TR == "20" ]; then
	TRAIN=200000
fi
if [ $TR == "25" ]; then
	TRAIN=250000
fi
if [ $TR == "30" ]; then
	TRAIN=300000
fi

echo ${TEST} ${DEV} ${TRAIN}



SRC=data/${LANG}_full/src.utf8
TGT=data/${LANG}_full/tgt.utf8
OUT=data/${LABEL}

rm -rf out
mkdir -p out


# test
echo "TEST SET"
head -n ${TEST} ${SRC} > out/src-eval.utf8
head -n ${TEST} ${TGT} > out/tgt-eval.utf8

let TEST=$TEST+1
tail -n +${TEST} ${SRC} > out/src-tmp1.utf8
tail -n +${TEST} ${TGT} > out/tgt-tmp1.utf8

# dev
echo "DEVELOPMENT SET"
head -n ${DEV} out/src-tmp1.utf8 > out/src-dev.utf8
head -n ${DEV} out/tgt-tmp1.utf8 > out/tgt-dev.utf8

let DEV_TRAIN=$DEV+1
tail -n +${DEV} out/src-tmp1.utf8 > out/src-tmp2.utf8
tail -n +${DEV} out/tgt-tmp1.utf8 > out/tgt-tmp2.utf8

# train
echo "TRAIN SET"
head -n ${TRAIN} out/src-tmp2.utf8 > out/src-train.utf8
head -n ${TRAIN} out/tgt-tmp2.utf8 > out/tgt-train.utf8

let TRAIN=$TRAIN+1
tail -n +${TRAIN} out/src-tmp2.utf8 > out/src-tmp4.utf8
tail -n +${TRAIN} out/tgt-tmp2.utf8 > out/tgt-tmp4.utf8


#SGM=tools/bins/plain2sgm
#${SGM} -s $LABEL en kr out/src-test.utf8 out/src-test.sgm
#${SGM} -r $LABEL en kr out/tgt-test.utf8 out/ref-test.sgm
#rm -rf out/tgt-test.utf8

rm -rf out/*tmp?.utf8
rm -rf ${OUT}
mv out ${OUT}


